Exploratory Analysis of Demographic Characteristics Where Facilities are Located
# needed packages
library(tidyverse)
# to retrieve census data
library(censusapi)
# to read boundaries file
library(sf)# read prison boundaries file from Homeland Infrastructure Foundation-Level Data Platform
# downloaded in geodatabase form and unzipped, read as sf object
# link: https://hifld-geoplatform.opendata.arcgis.com/datasets/geoplatform::prison-boundaries/about
prison_boundaries <- st_read("./1075f3ca-0050-4264-82e2-079a2daf2ec5.gdb", quiet = TRUE)
# read results that contain block assignment for each facility saved in csv
prison_boundaries_blocks <- read_csv("./prison_blocks.csv")
# for prison boundaries file, get list of all state county combinations represented, including the
# codes for the corresponding states and counties
# resulting data frame has 4 columns -->
# COUNTYFIPS -- 5 digit code consisting of state code and county code
# STATE -- 2 chracter digit state abbreviation
# STATEFIPS -- 2 digit state FIPS
# COUNTYCODE -- 3 digit county code
state_county_list <- st_drop_geometry(prison_boundaries) %>%
group_by(COUNTYFIPS, STATE) %>%
summarize() %>%
select( COUNTYFIPS, STATE) %>%
mutate(STATE = as.character(STATE),
COUNTYFIPS = as.character(COUNTYFIPS),
STATEFIPS = substr(COUNTYFIPS, 1, 2),
COUNTYCODE= substr(COUNTYFIPS,
nchar(COUNTYFIPS) - 2,
nchar(COUNTYFIPS)))
# remove the NOT AVAILABLE entry
state_county_list_filtered <- state_county_list[state_county_list$COUNTYFIPS != "NOT AVAILABLE",]
# get all state fips codes in prison boundaries data
state_codes <- state_county_list_filtered %>%
group_by(STATEFIPS) %>%
summarize() %>%
pull(STATEFIPS)
# can use this function to see what variables are available for the given dataset
# listCensusMetadata(name = "pdb/blockgroup", vintage = 2021, type = "variables", group = NULL)
# get census data for the variables listed at the blockgroup level; block level not available
get_census_data_safe <- function(state_code) {
region_for_state = paste0("state:", state_code, "+county:*+tract:*")
result <- tryCatch(
{
getCensus(name = "pdb/blockgroup",
vintage = 2021,
key = "fbbc8b0d3e53089da7cabc380628d6d46ae00444",
vars = c("Block_group",
"County",
"URBANIZED_AREA_POP_CEN_2010",
"Tot_Population_CEN_2010",
"Tot_Population_ACS_15_19",
"Males_ACS_15_19",
"Females_ACS_15_19",
"Median_Age_ACS_15_19",
"Hispanic_ACS_15_19",
"NH_White_alone_ACS_15_19",
"NH_Blk_alone_ACS_15_19",
"NH_AIAN_alone_ACS_15_19",
"NH_Asian_alone_ACS_15_19",
"NH_NHOPI_alone_ACS_15_19",
"NH_SOR_alone_ACS_15_19",
"Pov_Univ_ACS_15_19",
"Prs_Blw_Pov_Lev_ACS_15_19",
"No_Health_Ins_ACS_15_19",
"Tot_Occp_Units_ACS_15_19",
"Aggregate_HH_INC_ACS_15_19",
"Med_HHD_Inc_BG_ACS_15_19",
"Tot_Prns_in_HHD_ACS_15_19",
"Renter_Occp_HU_ACS_15_19",
"Med_House_Value_BG_ACS_15_19",
"pct_ENG_VW_ACS_15_19",
"pct_URBANIZED_AREA_POP_CEN_2010",
"pct_RURAL_POP_CEN_2010",
"pct_Inst_GQ_CEN_2010",
"pct_Non_Inst_GQ_CEN_2010",
"pct_URBAN_CLUSTER_POP_CEN_2010",
"pct_PUB_ASST_INC_ACS_15_19"
),
regionin = region_for_state,
region = "block group:*")
} ,
error=function(e) {
return()
}
)
return(result)
}
# get block group level data for every state in state_codes - only run when making changes to the variables included in the function above
# blockgroup_level_data <- map_dfr(state_codes, ~get_census_data_safe(.x))
# remove of redundant columns
# blockgroup_level_data <- blockgroup_level_data %>%
# select(-Block_group, County)
# write_csv(blockgroup_level_data, "./blockgroup_level_data.csv")
blockgroup_level_data <- read_csv("./blockgroup_level_data.csv")
# get block group number, which is the first digit of the block number
prison_boundaries_blocks <- prison_boundaries_blocks %>%
mutate(block_group = as.numeric(substr(BLOCKCE10, 1,1)))
# join prison boundaries data to the census data
prison_boundaries_block_census <- left_join(prison_boundaries_blocks, blockgroup_level_data,
by = c("STATEFP10" = "state",
"COUNTYFP10" = "county",
"TRACTCE10" = "tract",
"block_group" = "block_group"))
# all variables in the dataset
all_vars <- prison_boundaries_block_census %>% colnames()Variable definitions are from the documentation for the 2021 Planning Database.
First, we can look at the distributions of the median age in block groups where facilities are located.
Distribution of Median Age
library(forcats)
#### FOR COMPARISON: distribution of median age for all block groups across the U.S.
blockgroup_level_data %>%
select(Median_Age_ACS_15_19) %>%
ggplot(aes(x = Median_Age_ACS_15_19 )) +
geom_histogram() +
geom_vline(aes(xintercept = median(Median_Age_ACS_15_19, na.rm = TRUE)), color = "darkred") +
theme_bw() +
labs(y = "Number of Block Groups",
x = "Median Age",
subtitle = "2015 – 2019 5-year ACS sample data ",
title = "Distribution of Median Age in Block Groups Across the U.S.") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
scale_x_continuous(breaks = seq(0, 100, by = 10), limits = c(0, 100))# Distribution of Median Age in Block Groups Where Facilities are Located using Median_Age_ACS_15_19 variable
###### distribution of median age by block group
prison_boundaries_block_census %>%
group_by(STATE, COUNTY, TRACTCE10, block_group, Median_Age_ACS_15_19) %>%
summarize() %>%
select(Median_Age_ACS_15_19) %>%
ggplot(aes(x = Median_Age_ACS_15_19 )) +
geom_histogram() +
geom_vline(aes(xintercept = median(Median_Age_ACS_15_19, na.rm = TRUE)), color = "darkred") +
theme_bw() +
labs(y = "Number of Block Groups",
x = "Median Age",
subtitle = "Median of the Distribution Marked in Red\n2015 – 2019 5-year ACS sample data ",
title = "Distribution of Median Age in Block Groups Where Facilities are Located") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
scale_x_continuous(breaks = seq(0, 100, by = 10), limits = c(0,100))###### distribution of median age by facility
prison_boundaries_block_census %>%
select(Median_Age_ACS_15_19) %>%
ggplot(aes(x = Median_Age_ACS_15_19 )) +
geom_histogram() +
geom_vline(aes(xintercept = median(Median_Age_ACS_15_19, na.rm = TRUE)), color = "darkred") +
theme_bw() +
labs(y = "Number of Facilities",
x = "Median Age",
subtitle = "Median of the Distribution Marked in Red\n2015 – 2019 5-year ACS sample data ",
title = "Distribution of Median Age in Block Groups Where Facilities are Located") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5))# median age distribution faceted by facility type
##### by block group
prison_boundaries_block_census %>%
group_by(STATE, COUNTY, TRACTCE10, block_group, Median_Age_ACS_15_19, TYPE) %>%
summarize() %>%
group_by(TYPE) %>%
mutate(Median = median(Median_Age_ACS_15_19, na.rm = TRUE)) %>%
ungroup() %>%
select(Median_Age_ACS_15_19, TYPE, Median) %>%
mutate(TYPE = factor(TYPE, levels = c("LOCAL", "COUNTY",
"STATE", "FEDERAL",
"MULTI", "NOT AVAILABLE"))) %>%
ggplot(aes(x = Median_Age_ACS_15_19)) +
geom_histogram() +
geom_vline(aes(xintercept = Median), color = "darkred") +
facet_wrap(~TYPE, scales = "free_y") +
theme_bw() +
labs(y = "Number of Block Groups",
x = "Median Age",
subtitle = "Median of the Distribution Marked in Red\n2015 – 2019 5-year ACS sample data ",
title = "Distribution of Median Age in Block Groups Where Facilities are Located") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5))##### by facility
prison_boundaries_block_census %>%
group_by(TYPE) %>%
mutate(Median = median(Median_Age_ACS_15_19, na.rm = TRUE)) %>%
select(Median_Age_ACS_15_19, TYPE, Median) %>%
mutate(TYPE = factor(TYPE, levels = c("LOCAL", "COUNTY",
"STATE", "FEDERAL",
"MULTI", "NOT AVAILABLE"))) %>%
ggplot(aes(x = Median_Age_ACS_15_19)) +
geom_histogram() +
geom_vline(aes(xintercept = Median), color = "darkred") +
facet_wrap(~TYPE, scales = "free_y") +
theme_bw() +
labs(y = "Number of Facilities",
x = "Median Age",
subtitle = "Median of the Distribution Marked in Red\n2015 – 2019 5-year ACS sample data ",
title = "Distribution of Median Age in Block Groups Where Facilities are Located") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5))# median age distribution faceted by facility security level
##### by block group
prison_boundaries_block_census %>%
group_by(STATE, COUNTY, TRACTCE10,
block_group, Median_Age_ACS_15_19,
SECURELVL) %>%
summarize() %>%
group_by(SECURELVL) %>%
mutate(Median = median(Median_Age_ACS_15_19, na.rm = TRUE)) %>%
ungroup() %>%
select(Median_Age_ACS_15_19, SECURELVL, Median) %>%
mutate(SECURELVL = factor(SECURELVL, levels = c("JUVENILE", "MINIMUM",
"MEDIUM", "MAXIMUM",
"CLOSE", "NOT AVAILABLE"))) %>%
ggplot(aes(x = Median_Age_ACS_15_19)) +
geom_histogram() +
geom_vline(aes(xintercept = Median), color = "darkred") +
facet_wrap(~SECURELVL, scales = "free_y") +
theme_bw() +
labs(y = "Number of Block Groups",
x = "Median Age",
subtitle = "2015 – 2019 5-year ACS sample data ",
title = "Distribution of Median Age in Block Groups Where Facilities are Located") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5))##### by facility
prison_boundaries_block_census %>%
group_by(SECURELVL) %>%
mutate(Median = median(Median_Age_ACS_15_19, na.rm = TRUE)) %>%
select(Median_Age_ACS_15_19, SECURELVL, Median) %>%
mutate(SECURELVL = factor(SECURELVL, levels = c("JUVENILE", "MINIMUM",
"MEDIUM", "MAXIMUM",
"CLOSE", "NOT AVAILABLE"))) %>%
ggplot(aes(x = Median_Age_ACS_15_19)) +
geom_histogram() +
geom_vline(aes(xintercept = Median), color = "darkred") +
facet_wrap(~SECURELVL, scales = "free_y") +
theme_bw() +
labs(y = "Number of Facilities",
x = "Median Age",
subtitle = "2015 – 2019 5-year ACS sample data ",
title = "Distribution of Median Age in Block Groups Where Facilities are Located") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5))Distribution of the Percentage Urbanized
As defined in the documentation, the variable definitions are as follows:
* pct_URBANIZED_AREA_POP_CEN_2010: “percentage of the 2010 Census total population that lives in a densely settled area containing 50,000 or more people”
* pct_URBAN_CLUSTER_POP_CEN_2010: “percentage of the 2010 Census total population that lives in a densely settled area containing 2,500 to 49,999 people” * pct_RURAL_POP_CEN_2010: “percentage of the 2010 Census total population that lives outside of an urbanized area or urban cluster”
#### FOR COMPARISON -- distribution of percentage urbanized across the full U.S.
blockgroup_level_data %>%
mutate(Median = median(pct_URBANIZED_AREA_POP_CEN_2010, na.rm = TRUE),
Mean = mean(pct_URBANIZED_AREA_POP_CEN_2010, na.rm = TRUE)) %>%
select(pct_URBANIZED_AREA_POP_CEN_2010, Median, Mean) %>%
ggplot(aes(x = pct_URBANIZED_AREA_POP_CEN_2010 )) +
geom_histogram(alpha = .9) +
geom_vline(aes(xintercept = Median),
color = "darkred",
size = 1.2) +
geom_vline(aes(xintercept =Mean),
color = "#718BCE",
size = 1.2) +
theme_bw() +
labs(y = "Number of Block Groups",
x = "Percentage Urbanized",
subtitle = "Mean in Blue, Median in Red\n2010 Census Data",
title = "Distribution of the Percentage Urbanized for Block Groups in the U.S.") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) # distribution of percentage urbanized
# by facility
prison_boundaries_block_census %>%
mutate(Median = median(pct_URBANIZED_AREA_POP_CEN_2010, na.rm = TRUE),
Mean = mean(pct_URBANIZED_AREA_POP_CEN_2010, na.rm = TRUE)) %>%
select(pct_URBANIZED_AREA_POP_CEN_2010, Median, Mean) %>%
ggplot(aes(x = pct_URBANIZED_AREA_POP_CEN_2010 )) +
geom_histogram(alpha = .9) +
geom_vline(aes(xintercept = Median),
color = "darkred",
size = 1.2) +
geom_vline(aes(xintercept =Mean),
color = "#718BCE",
size = 1.2) +
theme_bw() +
labs(y = "Number of Facilities",
x = "Percentage Urbanized",
subtitle = "Mean in Blue, Median in Red\n2010 Census Data",
title = "Distribution of the Percentage Urbanized Where Facilities are Located") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
ylim(0, 4100)# how many block groups with multiple facilities
#prison_boundaries_block_census %>%
# group_by(STATE, COUNTY, TRACTCE10, block_group) %>%
# summarize(n = n()) %>% filter(n > 1 ) %>% nrow()
#### FOR COMPARISON -- distribution of percentage rural across the U.S.
blockgroup_level_data %>%
mutate(Median = median(pct_URBANIZED_AREA_POP_CEN_2010, na.rm = TRUE),
Mean = mean(pct_URBANIZED_AREA_POP_CEN_2010, na.rm = TRUE)) %>%
select(pct_RURAL_POP_CEN_2010, Median, Mean) %>%
ggplot(aes(x = pct_RURAL_POP_CEN_2010 )) +
geom_histogram() +
geom_vline(aes(xintercept = Median),
color = "darkred",
size = 1.2) +
geom_vline(aes(xintercept =Mean),
color = "#718BCE",
size = 1.2) +
theme_bw() +
labs(y = "Number of Block Groups",
x = "Percentage Rural",
subtitle = "Mean in Blue, Median in Red\n2010 Census Data",
title = "Distribution of the Percentage Rural in Block Groups in the U.S.") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) # distribution
# distribution of percentage rural
prison_boundaries_block_census %>%
mutate(Median = median(pct_URBANIZED_AREA_POP_CEN_2010, na.rm = TRUE),
Mean = mean(pct_URBANIZED_AREA_POP_CEN_2010, na.rm = TRUE)) %>%
select(pct_RURAL_POP_CEN_2010, Median, Mean) %>%
ggplot(aes(x = pct_RURAL_POP_CEN_2010 )) +
geom_histogram() +
geom_vline(aes(xintercept = Median),
color = "darkred",
size = 1.2) +
geom_vline(aes(xintercept =Mean),
color = "#718BCE",
size = 1.2) +
theme_bw() +
labs(y = "Number of Facilities",
x = "Percentage Rural",
subtitle = "Mean in Blue, Median in Red\n2010 Census data ",
title = "Distribution of the Percentage Rural Where Facilities are Located") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
ylim(0, 4100)# for making tables
library(kableExtra)
# table of how many block groups had > 50% rural and how many had < 50% rural
prison_boundaries_block_census %>%
select(pct_URBANIZED_AREA_POP_CEN_2010,
pct_RURAL_POP_CEN_2010,
pct_URBAN_CLUSTER_POP_CEN_2010) %>%
mutate(Density = ifelse(pct_RURAL_POP_CEN_2010 > 50, "Rural", "Urban")) %>%
group_by(Density) %>%
summarize(n =n()) %>%
mutate(Density = replace_na(Density, "Missing"),
total = sum(n),
Percentage = (n / total)*100,
Percentage = paste0(as.character(round(Percentage, 2)), '%')) %>%
select(Density, Percentage) %>%
kbl(caption = "<span style = 'color:black;font-size:2vw;'><b>Percentage of Facilities in a Block Group that was Mostly Rural or Mostly Urban</b></span><span style = 'font-size: 1.5vw; color:black;'><br>Rural block groups are defined here as those where greater than 50% of the 2010 Census total\npopulation for that block group <br> was recorded to live outside of an urbanized area or urban cluster<br></span><i><span style = 'font-size: 1vw'>Data from the 2010 Census</i></span>") %>%
kable_material(c("striped", "hover"), full_width = T) %>%
row_spec(0, background = "#D9DFEE") | Density | Percentage |
|---|---|
| Rural | 31.12% |
| Urban | 68.25% |
| Missing | 0.64% |
# footnote("Rural block groups are defined here as those where greater than 50% of the 2010 Census total\npopulation for that block group was recorded to live outside of an urbanized area or urban cluster.")
# to add tab use  
### FOR COMPARISON -- table with mean and median percentages urbanized, urban cluster, and rural in the U.S.
blockgroup_level_data %>%
summarize(Urban = mean(pct_URBANIZED_AREA_POP_CEN_2010, na.rm = TRUE),
`Urban Cluster` = mean(pct_URBAN_CLUSTER_POP_CEN_2010, na.rm = TRUE),
Rural = mean(pct_RURAL_POP_CEN_2010, na.rm = TRUE)) %>%
pivot_longer(cols = 1:3, names_to = "Population Density Category",
values_to = "Mean Percentage") %>%
cbind("Median Percentage" = c(
median(blockgroup_level_data$pct_URBANIZED_AREA_POP_CEN_2010,
na.rm = TRUE),
median(blockgroup_level_data$pct_URBAN_CLUSTER_POP_CEN_2010,
na.rm = TRUE),
median(blockgroup_level_data$pct_RURAL_POP_CEN_2010, na.rm = TRUE))) %>%
kbl(caption = "<span style = 'color:black;font-size:2vw;'><b>For Block Groups in the United States: Mean and Median Percentages in Urbanized, Urban Cluster, and Rural Areas</b></span><span style = 'float:center'><i><span style = 'font-size: 1vw'><br> Data from the 2010 Census</i></span>") %>%
kable_material(c("striped", "hover"), full_width = F) %>%
row_spec(0, background = "#D9DFEE") | Population Density Category | Mean Percentage | Median Percentage |
|---|---|---|
| Urban | 69.00312 | 100 |
| Urban Cluster | 10.30077 | 0 |
| Rural | 20.69611 | 0 |
# table with mean and median percentages urbanized, urban cluster, and rural
prison_boundaries_block_census %>%
summarize(Urban = mean(pct_URBANIZED_AREA_POP_CEN_2010, na.rm = TRUE),
`Urban Cluster` = mean(pct_URBAN_CLUSTER_POP_CEN_2010, na.rm = TRUE),
Rural = mean(pct_RURAL_POP_CEN_2010, na.rm = TRUE)) %>%
pivot_longer(cols = 1:3, names_to = "Population Density Category",
values_to = "Mean Percentage") %>%
cbind("Median Percentage" = c(
median(prison_boundaries_block_census$pct_URBANIZED_AREA_POP_CEN_2010, na.rm = TRUE),
median(prison_boundaries_block_census$pct_URBAN_CLUSTER_POP_CEN_2010, na.rm = TRUE),
median(prison_boundaries_block_census$pct_RURAL_POP_CEN_2010, na.rm = TRUE))) %>%
kbl(caption = "<span style = 'color:black;font-size:2vw;'><b> For Block Groups Where Facilities are Located: Mean and Median Percentages in Urbanized, Urban Cluster, and Rural Areas</b></span><span style = 'float:center'><i><span style = 'font-size: 1vw'><br> Data from the 2010 Census</i></span>") %>%
kable_material(c("striped", "hover"), full_width = F) %>%
row_spec(0, background = "#D9DFEE") | Population Density Category | Mean Percentage | Median Percentage |
|---|---|---|
| Urban | 36.63423 | 0.0 |
| Urban Cluster | 29.42237 | 0.0 |
| Rural | 33.94341 | 7.6 |
We can also see how the distribution of age varies depending on whether blocks are primarily urban or rural, among different facility types, and among different facility security levels.
###### distribution of median age in rural versus urban settings, where rural blocks are defined as those
# where more than 50% of the population is rural
# mean of the distribution is higher in rural areas, a trend that has been reported elsewhere
prison_boundaries_block_census %>%
mutate(Density = ifelse(pct_RURAL_POP_CEN_2010 > 50,
"Rural", "Urban")) %>%
group_by(Density, TYPE) %>%
mutate(Mean = mean(Median_Age_ACS_15_19,
na.rm =TRUE),
Median = median(Median_Age_ACS_15_19,
na.rm = TRUE)) %>%
filter(!is.na(Density)) %>%
ggplot(aes(x = Median_Age_ACS_15_19 )) +
facet_grid(TYPE~Density, scales = "free_y") +
geom_histogram(alpha = .8) +
geom_vline(aes(xintercept = Median ), color = "darkred") +
theme_bw() +
labs(y = "Number of Facilities",
x = "Median Age",
subtitle = "2015 – 2019 5-year ACS sample data & 2010 Census Data",
title = "Distribution of Median Age in Block Groups Where Facilities are Located") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5))# faceted by rural/urban status and facility type
prison_boundaries_block_census %>%
mutate(Density = ifelse(pct_RURAL_POP_CEN_2010 > 50,
"Rural", "Urban")) %>%
group_by(Density, TYPE) %>%
mutate(Mean = mean(Median_Age_ACS_15_19, na.rm =TRUE),
Median = median(Median_Age_ACS_15_19, na.rm = TRUE)) %>%
filter(!is.na(Density)) %>%
ggplot(aes(x = Median_Age_ACS_15_19 )) +
facet_grid(TYPE~Density, scales = "free_y") +
geom_histogram(alpha = .8) +
geom_vline(aes(xintercept = Median ), color = "darkred") +
theme_bw() +
labs(y = "Number of Facilities",
x = "Median Age",
subtitle = "2015 – 2019 5-year ACS sample data & 2010 Census Data",
title = "Distribution of Median Age in Block Groups Where Facilities are Located\n by Population Density Category and Faility Type") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5))# faceted by rural/urban status and county type
prison_boundaries_block_census %>%
mutate(Density = ifelse(pct_RURAL_POP_CEN_2010 > 50,
"Rural", "Urban")) %>%
group_by(Density, SECURELVL) %>%
mutate(Mean = mean(Median_Age_ACS_15_19, na.rm =TRUE),
Median = median(Median_Age_ACS_15_19, na.rm = TRUE),
SECURELVL = factor(SECURELVL,
levels = c("JUVENILE",
"MINIMUM", "MEDIUM",
"MAIMUM", "CLOSE",
"NOT AVAILABLE"))) %>%
filter(!is.na(Density)) %>%
ggplot(aes(x = Median_Age_ACS_15_19 )) +
facet_grid(SECURELVL~Density, scales = "free_y") +
geom_histogram(alpha = .8) +
geom_vline(aes(xintercept = Median ), color = "darkred") +
theme_bw() +
labs(y = "Number of Facilities",
x = "Median Age",
subtitle = "2015 – 2019 5-year ACS sample data & 2010 Census Data",
title = "Distribution of Median Age in Block Groups Where Facilities are Located\n by Population Density Category and Security Level") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5))Distribution of Racial Percentages for each Racial Category
# prison_blocks_race
#
# mutate(sum_all = Hispanic_ACS_15_19 + NH_AIAN_alone_ACS_15_19 + NH_Asian_alone_ACS_15_19 + NH_Blk_alone_ACS_15_19 + NH_NHOPI_alone_ACS_15_19 + NH_SOR_alone_ACS_15_19 + NH_White_alone_ACS_15_19) %>% View()
# for race reference csv, copied and pasted definitions at link here https://api.census.gov/data/2021/pdb/blockgroup/variables.html into excel file
race_ref <- read_csv("./races_reference.csv", col_names = FALSE) %>%
select(X1, X2) %>%
rename(`Racial Category` = X1,
Description = X2) %>%
mutate(Description = str_replace(Description, "in the ACS", ""),
Description = str_replace(Description, ",", ",\n")) %>%
filter(!str_detect(Description, "Census") & !str_detect(Description, "MOE") )
blockgroup_level_data %>%
select(
Hispanic_ACS_15_19, NH_AIAN_alone_ACS_15_19,
NH_Asian_alone_ACS_15_19, NH_Blk_alone_ACS_15_19,
NH_NHOPI_alone_ACS_15_19, NH_SOR_alone_ACS_15_19,
NH_White_alone_ACS_15_19, Tot_Population_ACS_15_19) %>%
mutate(across(1:ncol(.), ~ (.x / Tot_Population_ACS_15_19) * 100)) %>% # get percentage by dividing by total for that block
select(-Tot_Population_ACS_15_19) %>%
pivot_longer(1:ncol(.), names_to = "Racial Category", values_to ="Percentage") %>%
left_join(race_ref, by = c("Racial Category" = "Racial Category")) %>%
group_by(Description) %>%
mutate(Median = median(Percentage, na.rm = TRUE),
Mean = mean(Percentage, na.rm = TRUE)) %>%
ggplot(aes(x = Percentage)) +
geom_histogram(alpha = .9) +
geom_vline(aes(xintercept = Median), color = "darkred") +
geom_vline(aes(xintercept = Mean), color = "#718BCE") +
facet_wrap(~Description, scales = "free_y", ncol = 2) +
scale_x_continuous(breaks = seq(0, 100, by = 10)) +
theme_bw() +
labs(y = "Number of Block Groups",
x = "Percentage",
subtitle = "Median in Red, Median in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of Racial Percentages in Block Groups in the U.S.") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5))prison_race <- prison_boundaries_block_census %>%
select(FACILITYID, TYPE, SECURELVL,
Hispanic_ACS_15_19, NH_AIAN_alone_ACS_15_19,
NH_Asian_alone_ACS_15_19, NH_Blk_alone_ACS_15_19,
NH_NHOPI_alone_ACS_15_19, NH_SOR_alone_ACS_15_19,
NH_White_alone_ACS_15_19, Tot_Population_ACS_15_19) %>%
mutate(across(4:ncol(.), ~ (.x / Tot_Population_ACS_15_19) * 100)) %>% # get percentage by dividing by total for that block
select(-Tot_Population_ACS_15_19) %>%
pivot_longer(4:ncol(.), names_to = "Racial Category", values_to ="Percentage") %>%
left_join(race_ref, by = c("Racial Category" = "Racial Category"))
prison_race %>%
group_by(Description) %>%
mutate(Median = median(Percentage, na.rm = TRUE),
Mean = mean(Percentage, na.rm = TRUE)) %>%
ggplot(aes(x = Percentage)) +
geom_histogram(alpha = .9) +
geom_vline(aes(xintercept = Median), color = "darkred") +
geom_vline(aes(xintercept = Mean), color = "#718BCE") +
facet_wrap(~Description, scales = "free_y", ncol = 2) +
scale_x_continuous(breaks = seq(0, 100, by = 10)) +
theme_bw() +
labs(y = "Number of Facilities",
x = "Percentage",
subtitle = "Median Percentages in Red, Mean Percentages in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of Racial Percentages in Block Groups Where Facilities are Located") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5))Distribution of Percentage without Health Insurance
blockgroup_level_data %>%
mutate(pct_no_insurance = (No_Health_Ins_ACS_15_19 / Tot_Population_ACS_15_19) * 100,
Mean = mean(pct_no_insurance, na.rm = TRUE),
Median = median(pct_no_insurance, na.rm = TRUE)) %>%
ggplot(aes(x = pct_no_insurance)) +
geom_histogram() +
geom_vline(aes(xintercept = Median), color = "darkred") +
geom_vline(aes(xintercept = Mean), color = "#718BCE") +
theme_bw() +
labs(y = "Number of Block Groups",
x = "Percentage",
subtitle = "Median Percentages in Red, Mean Percentages in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of the Percentage Lacking Health Insurance in Block Groups in the U.S.") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
xlim(0,100)# faceted by facility type
prison_boundaries_block_census %>%
mutate(pct_no_insurance = (No_Health_Ins_ACS_15_19 / Tot_Population_ACS_15_19) * 100) %>%
group_by(TYPE) %>%
mutate(
Mean = mean(pct_no_insurance, na.rm = TRUE),
Median = median(pct_no_insurance, na.rm = TRUE),
TYPE = factor(TYPE, levels = c("LOCAL", "COUNTY",
"STATE", "FEDERAL",
"MULTI", "NOT AVAILABLE"))) %>%
ggplot(aes(x = pct_no_insurance)) +
geom_histogram() +
facet_wrap(~TYPE, scales = "free_y") +
geom_vline(aes(xintercept = Median), color = "darkred") +
geom_vline(aes(xintercept = Mean), color = "#718BCE") +
theme_bw() +
labs(y = "Number of Block Groups",
x = "Percentage",
subtitle = "Median Percentages in Red, Mean Percentages in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of the Percentage Lacking Health Insurance in Block Groups Where Facilities are Located\nFaceted By Facility Type") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
xlim(0,100)# faceted by security level
prison_boundaries_block_census %>%
mutate(pct_no_insurance = (No_Health_Ins_ACS_15_19 / Tot_Population_ACS_15_19) * 100) %>%
group_by(SECURELVL) %>%
mutate(
Mean = mean(pct_no_insurance, na.rm = TRUE),
Median = median(pct_no_insurance, na.rm = TRUE),
SECURELVL = factor(SECURELVL, levels = c("JUVENILE", "MINIMUM",
"MEDIUM", "MAXIMUM",
"CLOSE", "NOT AVAILABLE"))) %>%
ggplot(aes(x = pct_no_insurance)) +
geom_histogram() +
facet_wrap(~SECURELVL, scales = "free_y", ncol = 2) +
geom_vline(aes(xintercept = Median), color = "darkred") +
geom_vline(aes(xintercept = Mean), color = "#718BCE") +
theme_bw() +
labs(y = "Number of Facilities",
x = "Percentage",
subtitle = "Median Percentages in Red, Mean Percentages in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of the Percentage Lacking Health Insurance in Block Groups Where Facilities are Located\nFaceted By Facility Type") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
xlim(0,100)Distribution of Percentages Below the Poverty Level
Referencing the documentation:
Prs_Blw_Pov_Lev_ACS_15_19 is defined as the “Number of people classified as below the poverty level given their total family or household income within the last year, family size, and family composition in the ACS population”.
To calculate the percentage, we can use the Pov_Univ_ACS_15_19 variable, which is defined as the “Population for whom poverty level is determined”.
#### FOR COMPARISON -- poverty distribution for all block groups in the U.S.
blockgroup_level_data %>%
mutate(pct_below_pov = (Prs_Blw_Pov_Lev_ACS_15_19 / Pov_Univ_ACS_15_19) * 100,
Mean = mean(pct_below_pov, na.rm = TRUE),
Median = median(pct_below_pov, na.rm = TRUE)) %>%
ggplot(aes(x = pct_below_pov)) +
geom_histogram(alpha = .9) +
geom_vline(aes(xintercept = Median), color = "darkred", size = 1.2) +
geom_vline(aes(xintercept = Mean), color = "#718BCE", size = 1.2) +
theme_bw() +
labs(y = "Number of Block Groups",
x = "Percentage",
subtitle = "Median Percentages in Red, Mean Percentages in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of the Percentage Below the Poverty Line in Block Groups in the U.S.") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
xlim(-5,100)# for block groups where facilities are located
prison_boundaries_block_census %>%
mutate(pct_below_pov = (Prs_Blw_Pov_Lev_ACS_15_19 / Pov_Univ_ACS_15_19) * 100,
Mean = mean(pct_below_pov, na.rm = TRUE),
Median = median(pct_below_pov, na.rm = TRUE)) %>%
ggplot(aes(x = pct_below_pov)) +
geom_histogram(alpha = .9) +
geom_vline(aes(xintercept = Median), color = "darkred", size = 1.2) +
geom_vline(aes(xintercept = Mean), color = "#718BCE", size = 1.2) +
theme_bw() +
labs(y = "Number of Facilities",
x = "Percentage",
subtitle = "Median Percentages in Red, Mean Percentages in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of the Percentage Below the Poverty Line in Block Groups Where Facilities are Located") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
xlim(-5,100)# by facility type
prison_boundaries_block_census %>%
mutate(pct_below_pov = (Prs_Blw_Pov_Lev_ACS_15_19 / Pov_Univ_ACS_15_19) * 100,
TYPE = factor(TYPE, levels = c("LOCAL", "COUNTY",
"STATE", "FEDERAL",
"MULTI", "NOT AVAILABLE"))) %>%
group_by(TYPE) %>%
mutate(Mean = mean(pct_below_pov, na.rm = TRUE),
Median = median(pct_below_pov, na.rm = TRUE)) %>%
ggplot(aes(x = pct_below_pov)) +
geom_histogram(alpha = .9) +
facet_wrap(~TYPE, scales = "free_y") +
geom_vline(aes(xintercept = Median), color = "darkred", size = 1.2) +
geom_vline(aes(xintercept = Mean), color = "#718BCE", size = 1.2) +
theme_bw() +
labs(y = "Number of Facilities",
x = "Percentage",
subtitle = "Median Percentages in Red, Mean Percentages in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of the Percentage Below the Poverty Line in Block Groups Where Facilities are Located\nFaceted by Facility Type") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
xlim(-5,100)# by security level
prison_boundaries_block_census %>%
mutate(pct_below_pov = (Prs_Blw_Pov_Lev_ACS_15_19 / Pov_Univ_ACS_15_19) * 100,
SECURELVL = factor(SECURELVL, levels = c("JUVENILE", "MINIMUM",
"MEDIUM", "MAXIMUM",
"CLOSE", "NOT AVAILABLE"))) %>%
group_by(SECURELVL) %>%
mutate(Mean = mean(pct_below_pov, na.rm = TRUE),
Median = median(pct_below_pov, na.rm = TRUE)) %>%
ggplot(aes(x = pct_below_pov)) +
geom_histogram(alpha = .9) +
facet_wrap(~SECURELVL, scales = "free_y") +
geom_vline(aes(xintercept = Median), color = "darkred", size = 1.2) +
geom_vline(aes(xintercept = Mean), color = "#718BCE", size = 1.2) +
theme_bw() +
labs(y = "Number of Facilities",
x = "Percentage",
subtitle = "Median Percentages in Red, Mean Percentages in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of the Percentage Below the Poverty Line in Block Groups Where Facilities are Located\nFaceted by Security Level") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
xlim(-5,100)Distribution of the Percentage of Housing Units Rented
Renter_Occp_HU_ACS_15_19 is defined as “Number of ACS occupied housing units that are not owner occupied, whether they are rented or occupied without payment of rent”.
# distribution of percentage of housing units rented
blockgroup_level_data %>%
mutate(pct_rented = (Renter_Occp_HU_ACS_15_19 / Tot_Occp_Units_ACS_15_19) * 100,
Mean = mean(pct_rented, na.rm = TRUE),
Median = median(pct_rented, na.rm = TRUE)) %>%
ggplot(aes(x = pct_rented)) +
geom_histogram(alpha = .9) +
geom_vline(aes(xintercept = Median), color = "darkred", size = 1.2) +
geom_vline(aes(xintercept = Mean), color = "#718BCE", size = 1.2) +
theme_bw() +
labs(y = "Number of Block Groups",
x = "Percentage",
subtitle = "Median Percentage in Red, Mean Percentage in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of the Percentage of Occupied Housing Units Not Owner Occupied\nin Block Groups in the U.S.") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
xlim(-5,100)prison_boundaries_block_census %>%
mutate(pct_rented = (Renter_Occp_HU_ACS_15_19 / Tot_Occp_Units_ACS_15_19) * 100,
Mean = mean(pct_rented, na.rm = TRUE),
Median = median(pct_rented, na.rm = TRUE)) %>%
ggplot(aes(x = pct_rented)) +
geom_histogram(alpha = .9) +
geom_vline(aes(xintercept = Median), color = "darkred", size = 1.2) +
geom_vline(aes(xintercept = Mean), color = "#718BCE", size = 1.2) +
theme_bw() +
labs(y = "Number of Facilities",
x = "Percentage",
subtitle = "Median Percentage in Red, Mean Percentage in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of the Percentage of Occupied Housing Units Not Owner Occupied\n in Block Groups Where Facilities are Located") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
xlim(-5,100)# by facility type
prison_boundaries_block_census %>%
mutate(pct_rented = (Renter_Occp_HU_ACS_15_19 / Tot_Occp_Units_ACS_15_19) * 100,
TYPE = factor(TYPE, levels = c("LOCAL", "COUNTY",
"STATE", "FEDERAL",
"MULTI", "NOT AVAILABLE"))) %>%
group_by(TYPE) %>%
mutate(Mean = mean(pct_rented, na.rm = TRUE),
Median = median(pct_rented, na.rm = TRUE)) %>%
ggplot(aes(x = pct_rented)) +
geom_histogram(alpha = .9) +
facet_wrap(~TYPE, scales = "free_y") +
geom_vline(aes(xintercept = Median), color = "darkred", size = 1.2) +
geom_vline(aes(xintercept = Mean), color = "#718BCE", size = 1.2) +
theme_bw() +
labs(y = "Number of Facilities",
x = "Percentage",
subtitle = "Median Percentages in Red, Mean Percentages in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of the Percentage of Occupied Housing Units Not Owner Occupied\n in Block Groups Where Facilities are Located\nFaceted by Facility Type") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
xlim(-5,100)# by security level
prison_boundaries_block_census %>%
mutate(pct_rented = (Renter_Occp_HU_ACS_15_19 / Tot_Occp_Units_ACS_15_19) * 100,
SECURELVL = factor(SECURELVL, levels = c("JUVENILE", "MINIMUM",
"MEDIUM", "MAXIMUM",
"CLOSE", "NOT AVAILABLE"))) %>%
group_by(SECURELVL) %>%
mutate(Mean = mean(pct_rented, na.rm = TRUE),
Median = median(pct_rented, na.rm = TRUE)) %>%
ggplot(aes(x = pct_rented)) +
geom_histogram(alpha = .9) +
facet_wrap(~SECURELVL, scales = "free_y") +
geom_vline(aes(xintercept = Median), color = "darkred", size = 1.2) +
geom_vline(aes(xintercept = Mean), color = "#718BCE", size = 1.2) +
theme_bw() +
labs(y = "Number of Facilities",
x = "Percentage",
subtitle = "Median Percentages in Red, Mean Percentages in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of the Percentage of Occupied Housing Units Not Owner Occupied\n in Block Groups Where Facilities are Located\nFaceted by Security Level") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) +
xlim(-5,100)Distribution of Average Household Size
Med_House_Value_BG_ACS_15_19 is defined as the “Median of ACS respondents’ house value estimates per block group”.
# distribution of average household size for block groups in the U.S.
blockgroup_level_data %>%
mutate(avg_house = Tot_Prns_in_HHD_ACS_15_19 / Tot_Occp_Units_ACS_15_19,
Mean = mean(avg_house, na.rm = TRUE),
Median = median(avg_house, na.rm = TRUE)) %>%
ggplot(aes(x = avg_house)) +
geom_histogram(alpha = .9, bins = 40) +
geom_vline(aes(xintercept = Median), color = "darkred", size = 1.2) +
geom_vline(aes(xintercept = Mean), color = "#718BCE", size = 1.2) +
theme_bw() +
labs(y = "Number of Block Groups",
x = "Number of People",
subtitle = "Median Percentage in Red, Mean Percentage in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of the Average Household Size\nin Block Groups in the U.S.") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic", hjust = .5),
plot.title = element_text(face = "bold", hjust = 0.5)) + xlim(0, 8)prison_boundaries_block_census %>%
mutate(avg_house = Tot_Prns_in_HHD_ACS_15_19 / Tot_Occp_Units_ACS_15_19,
Mean = mean(avg_house, na.rm = TRUE),
Median = median(avg_house, na.rm = TRUE)) %>%
ggplot(aes(x = avg_house)) +
geom_histogram(alpha = .9, bins = 40) +
geom_vline(aes(xintercept = Median), color = "darkred", size = 1.2) +
geom_vline(aes(xintercept = Mean), color = "#718BCE", size = 1.2) +
theme_bw() +
labs(y = "Number of Facilities",
x = "Number of People",
subtitle = "Median Percentage in Red, Mean Percentage in Blue\n2015 – 2019 5-year ACS sample data",
title = "Distribution of the Average Household Size\nin Block Groups Where Facilities are Located") +
theme(text = element_text(family = "Optima"),
plot.subtitle = element_text(face = "italic",
hjust = .5),
plot.title = element_text(face = "bold",
hjust = 0.5)) +
xlim(0, 8)Distribution of Percentage Institutionalized
pct_Inst_GQ_CEN_2010is defined as “The percentage of the 2010 Census population who live in group quarters and are primarily ineligible, unable, or unlikely to participate in labor force while residents. Institutional group quarters include correctional facilities for adults, juvenile facilities, nursing facilities, and other institutional facilities.”
get_census_data_tract <- function(state_code) {
region_for_state = paste0("state:", state_code, "+county:*")
result <- tryCatch(
{
getCensus(name = "pdb/tract",
vintage = 2021,
key = "fbbc8b0d3e53089da7cabc380628d6d46ae00444",
vars = c(
"pct_Pop_NoCompDevic_ACS_15_19",
"pct_Pop_w_BroadComp_ACS_15_19",
"pct_Civ_unemp_16p_ACS_15_19"
),
regionin = region_for_state,
region = "tract:*")
} ,
error=function(e) {
return()
}
)
return(result)
}
### NOTE -- attempted trying to get the following variables as well, but they just returned NAs
# Aggregate_HH_INC_ACS_15_19
# Med_HHD_Inc_ACS_15_19
# avg_Agg_HH_INC_ACS_15_19
# Med_HHD_Inc_ACS_15_19_1
# Med_House_Value_ACS_15_19
# get tract level data for all states
tract_level_data <- map_dfr(state_codes, ~get_census_data_tract(.x))
state_code = "01"
region_for_state = paste0("state:", state_code, "+county:*")
t <- getCensus(name = "pdb/tract",
vintage = 2021,
key = "fbbc8b0d3e53089da7cabc380628d6d46ae00444",
vars = c(
"pct_Pop_NoCompDevic_ACS_15_19",
"pct_Pop_w_BroadComp_ACS_15_19",
"pct_Civ_unemp_16p_ACS_15_19"
),
regionin = region_for_state,
region = "tract:*")